In [1]:
import sys
import scanpy as sc
import anndata
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns

import cell2location
import scvi

from matplotlib import rcParams
rcParams['pdf.fonttype'] = 42 # enables correct plotting of text for PDFs

print("Import done!")
Global seed set to 0
Import done!
In [2]:
#loading visium data
adata_vis = sc.read_visium("./data/Hamstring/Ham1/", count_file='filtered_feature_bc_matrix.h5')
adata_vis.obs['sample'] = list(adata_vis.uns['spatial'].keys())[0]
adata_vis
/opt/lramosmu/miniconda3/envs/test_scvi16_cuda113/lib/python3.9/site-packages/anndata/_core/anndata.py:1830: UserWarning: Variable names are not unique. To make them unique, call `.var_names_make_unique`.
  utils.warn_names_duplicates("var")
Out[2]:
AnnData object with n_obs × n_vars = 2981 × 36601
    obs: 'in_tissue', 'array_row', 'array_col', 'sample'
    var: 'gene_ids', 'feature_types', 'genome'
    uns: 'spatial'
    obsm: 'spatial'
In [3]:
sc.pl.spatial(adata=adata_vis)
In [5]:
adata_vis.var
Out[5]:
gene_ids feature_types genome
MIR1302-2HG ENSG00000243485 Gene Expression GRCh38
FAM138A ENSG00000237613 Gene Expression GRCh38
OR4F5 ENSG00000186092 Gene Expression GRCh38
AL627309.1 ENSG00000238009 Gene Expression GRCh38
AL627309.3 ENSG00000239945 Gene Expression GRCh38
... ... ... ...
AC141272.1 ENSG00000277836 Gene Expression GRCh38
AC023491.2 ENSG00000278633 Gene Expression GRCh38
AC007325.1 ENSG00000276017 Gene Expression GRCh38
AC007325.4 ENSG00000278817 Gene Expression GRCh38
AC007325.2 ENSG00000277196 Gene Expression GRCh38

36601 rows × 3 columns

In [6]:
adata_vis.var_names_make_unique()
# find mitochondria-encoded (MT) genes
adata_vis.var['mt'] = adata_vis.var_names.str.startswith("MT-")
# ribosomal genes
adata_vis.var['ribo'] = adata_vis.var_names.str.startswith(("RPS","RPL"))
# hemoglobin genes.
adata_vis.var['hb'] = adata_vis.var_names.str.contains(("^HB[^(P)]"))
sc.pp.calculate_qc_metrics(adata_vis, qc_vars=["mt"], inplace=True)
adata_vis.var
OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.
Out[6]:
gene_ids feature_types genome mt ribo hb n_cells_by_counts mean_counts log1p_mean_counts pct_dropout_by_counts total_counts log1p_total_counts
MIR1302-2HG ENSG00000243485 Gene Expression GRCh38 False False False 0 0.000000 0.000000 100.000000 0.0 0.000000
FAM138A ENSG00000237613 Gene Expression GRCh38 False False False 0 0.000000 0.000000 100.000000 0.0 0.000000
OR4F5 ENSG00000186092 Gene Expression GRCh38 False False False 0 0.000000 0.000000 100.000000 0.0 0.000000
AL627309.1 ENSG00000238009 Gene Expression GRCh38 False False False 1 0.001006 0.001006 99.966454 3.0 1.386294
AL627309.3 ENSG00000239945 Gene Expression GRCh38 False False False 0 0.000000 0.000000 100.000000 0.0 0.000000
... ... ... ... ... ... ... ... ... ... ... ... ...
AC141272.1 ENSG00000277836 Gene Expression GRCh38 False False False 0 0.000000 0.000000 100.000000 0.0 0.000000
AC023491.2 ENSG00000278633 Gene Expression GRCh38 False False False 0 0.000000 0.000000 100.000000 0.0 0.000000
AC007325.1 ENSG00000276017 Gene Expression GRCh38 False False False 0 0.000000 0.000000 100.000000 0.0 0.000000
AC007325.4 ENSG00000278817 Gene Expression GRCh38 False False False 13 0.005703 0.005687 99.563905 17.0 2.890372
AC007325.2 ENSG00000277196 Gene Expression GRCh38 False False False 0 0.000000 0.000000 100.000000 0.0 0.000000

36601 rows × 12 columns

In [7]:
adata_vis
Out[7]:
AnnData object with n_obs × n_vars = 2981 × 36601
    obs: 'in_tissue', 'array_row', 'array_col', 'sample', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt'
    var: 'gene_ids', 'feature_types', 'genome', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts'
    uns: 'spatial'
    obsm: 'spatial'
In [8]:
sns.jointplot(
    data=adata_vis.obs,
    x="log1p_total_counts",
    y="log1p_n_genes_by_counts",
    kind="hex",
)
Out[8]:
<seaborn.axisgrid.JointGrid at 0x7fab3c245be0>
In [9]:
fig, axs = plt.subplots(1, 4, figsize=(15, 4))
sns.histplot(adata_vis.obs["total_counts"], kde=False, ax=axs[0])
sns.histplot(adata_vis.obs["total_counts"][adata_vis.obs["total_counts"] < 200], kde=False, bins=40, ax=axs[1])
sns.histplot(adata_vis.obs["n_genes_by_counts"], kde=False, bins=60, ax=axs[2])
sns.histplot(adata_vis.obs["n_genes_by_counts"][adata_vis.obs["n_genes_by_counts"] < 200], kde=False, bins=60, ax=axs[3])
Out[9]:
<AxesSubplot:xlabel='n_genes_by_counts', ylabel='Count'>
In [147]:
adata_vis = adata_vis_orig
adata_vis_orig
Out[147]:
AnnData object with n_obs × n_vars = 2818 × 36601
    obs: 'in_tissue', 'array_row', 'array_col', 'sample', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'n_counts'
    var: 'gene_ids', 'feature_types', 'genome', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts'
    uns: 'spatial'
    obsm: 'spatial'
In [148]:
sc.pp.filter_cells(adata_vis, min_counts=20)
#sc.pp.filter_cells(adata, max_counts=35000)
adata_vis = adata_vis[adata_vis.obs["pct_counts_mt"] < 25]
print(f"#cells after MT filter: {adata_vis.n_obs}")
sc.pp.filter_genes(adata_vis, min_cells=2)
#cells after MT filter: 2637
/opt/lramosmu/miniconda3/envs/test_scvi16_cuda113/lib/python3.9/site-packages/scanpy/preprocessing/_simple.py:251: ImplicitModificationWarning: Trying to modify attribute `.var` of view, initializing view as actual.
  adata.var['n_cells'] = number
In [149]:
sc.pl.spatial(adata_vis, img_key="hires", color='total_counts', spot_size=40, vmax=1000)
In [150]:
sc.pp.normalize_total(adata_vis, inplace=True)
sc.pp.log1p(adata_vis)
sc.pp.highly_variable_genes(adata_vis, flavor="seurat", n_top_genes=1000)
In [151]:
adata_vis
Out[151]:
AnnData object with n_obs × n_vars = 2637 × 13134
    obs: 'in_tissue', 'array_row', 'array_col', 'sample', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'n_counts'
    var: 'gene_ids', 'feature_types', 'genome', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
    uns: 'spatial', 'log1p', 'hvg'
    obsm: 'spatial'
In [156]:
#sc.pp.pca(adata_vis)
#sc.pp.neighbors(adata_vis)
#sc.tl.umap(adata_vis)
sc.tl.leiden(adata_vis, resolution=0.6, key_added="clusters")
In [157]:
plt.rcParams["figure.figsize"] = (4, 4)
sc.pl.umap(adata_vis, color=["total_counts", "n_genes_by_counts", "clusters"], wspace=0.4)
In [160]:
plt.rcParams["figure.figsize"] = (8, 8)
sc.pl.spatial(adata_vis, img_key="hires", color=["total_counts", "n_genes_by_counts"], size = 1.5)
In [161]:
sc.pl.spatial(adata_vis, img_key="hires", color="clusters", size=1.5)
In [163]:
markers = ['COL1A2', 'TTN', 'COMP', 'MKX', 'PECAM1']
sc.pl.dotplot(adata_vis, markers, groupby='clusters', dendrogram=True, title="Known Cell Markers")
In [166]:
#code to crop part of the image. Our hires image is 2000x2000 pixels. Top left is 0,0.
#Coordinates to use for cropping the image (left, right, top, bottom)
sc.pl.spatial(adata_vis, img_key="hires", color="clusters", groups=["0","2", "1", "5"], crop_coord=[2200, 3000, 900, 1600], size=1.4, title="Muscle-like Area")
/opt/lramosmu/miniconda3/envs/test_scvi16_cuda113/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1171: FutureWarning: Categorical.replace is deprecated and will be removed in a future version. Use Series.replace directly instead.
  values = values.replace(values.categories.difference(groups), np.nan)
In [167]:
sc.tl.rank_genes_groups(adata_vis, "clusters", method="t-test")
sc.pl.rank_genes_groups_heatmap(adata_vis, groups="6", n_genes=10, groupby="clusters")
sc.pl.rank_genes_groups_heatmap(adata_vis, groups="5", n_genes=10, groupby="clusters")
sc.pl.rank_genes_groups_heatmap(adata_vis, groups="4", n_genes=10, groupby="clusters")
sc.pl.rank_genes_groups_heatmap(adata_vis, groups="3", n_genes=10, groupby="clusters")
sc.pl.rank_genes_groups_heatmap(adata_vis, groups="2", n_genes=10, groupby="clusters")
sc.pl.rank_genes_groups_heatmap(adata_vis, groups="1", n_genes=10, groupby="clusters")
sc.pl.rank_genes_groups_heatmap(adata_vis, groups="0", n_genes=10, groupby="clusters")
WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different.
categories: 0, 1, 2, etc.
var_group_labels: 6
WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different.
categories: 0, 1, 2, etc.
var_group_labels: 5
WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different.
categories: 0, 1, 2, etc.
var_group_labels: 4
WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different.
categories: 0, 1, 2, etc.
var_group_labels: 3
WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different.
categories: 0, 1, 2, etc.
var_group_labels: 2
WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different.
categories: 0, 1, 2, etc.
var_group_labels: 1
WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different.
categories: 0, 1, 2, etc.
var_group_labels: 0
In [168]:
sc.pl.spatial(adata_vis, img_key="hires", color=["TNNT1", "TNNT3"], crop_coord=[2200, 3000, 900, 1600], color_map="magma", vmax=1)
In [181]:
sc.pl.spatial(adata_vis, img_key="hires", color=["PECAM1", "LYVE1"], size=1.5, vmax=1)
In [201]:
#code to crop part of the image. Our hires image is 2000x2000 pixels. Top left is 0,0.
#Coordinates to use for cropping the image (left, right, top, bottom)
sc.pl.spatial(adata_vis, img_key="hires", color=["PECAM1", "LYVE1", "NOTCH3"], crop_coord=[1000, 2000, 700, 1900], size=1.5, vmax=1)
In [186]:
#specific markers
sc.pl.spatial(adata_vis, img_key="hires", color=["NOTCH3"], size=1.5, vmax=1)
In [187]:
#specific markers
sc.pl.spatial(adata_vis, img_key="hires", color=["COL1A2", "DCN"], size=1.5, vmax=2)
In [191]:
#specific markers
sc.pl.spatial(adata_vis, img_key="hires", color=["PTPRC", "CD163"], size=1.5)
In [192]:
#specific markers
sc.pl.spatial(adata_vis, img_key="hires", color=["CD68", "CX3CR1"], size=1.5)
In [194]:
#specific markers
sc.pl.spatial(adata_vis, img_key="hires", color=["TRDN", "TNNT1", "TNNT3"], size=1.5, vmax=1)
In [203]:
sc.pl.spatial(adata_vis, img_key="hires", color=["TRDN","TNNT1", "TNNT3"], crop_coord=[2200, 3000, 900, 1600], vmax=1, size=1.3)
In [ ]: